# Load Statistical Analysis Functions
source("statistical_analysis_functions.R") # Load test functions
# Quietly move up one directory level from R/ to the project root
suppressMessages(setwd(".."))
# Load data files silently
suppressMessages(suppressWarnings(
tryCatch({
# Discovery cohort
HER2 <- read.csv("data/discovery_cohort/37POI-Her2.csv")
TN <- read.csv("data/discovery_cohort/37POI-TN.csv")
ERPR <- read.csv("data/discovery_cohort/37POI-ERPR.csv")
# Validation cohort
Luminal_A <- read.csv("data/validation_cohort/45POI-Luminal_A.csv")
Luminal_B <- read.csv("data/validation_cohort/45POI-Luminal_B.csv")
Basal <- read.csv("data/validation_cohort/45POI-Basal-like.csv")
HER2E <- read.csv("data/validation_cohort/45POI-Her2.csv")
}, error = function(e) {
stop("Error loading data files: ", e$message)
})
))##
## === DISCOVERY COHORT ===
##
## Analysing HER2
## ----------------------------------------
## Normality: 73.0% passed
## Linearity: 11 strong correlations
## Homoscedasticity: 90.5% passed
##
## Analysing Triple Negative
## ----------------------------------------
## Normality: 70.3% passed
## Linearity: 37 strong correlations
## Homoscedasticity: 93.7% passed
##
## Analysing ERPR
## ----------------------------------------
## Normality: 70.3% passed
## Linearity: 18 strong correlations
## Homoscedasticity: 93.5% passed
##
## === VALIDATION COHORT ===
##
## Analysing Luminal A
## ----------------------------------------
## Normality: 82.6% passed
## Linearity: 20 strong correlations
## Homoscedasticity: 91.8% passed
##
## Analysing Luminal B
## ----------------------------------------
## Normality: 73.9% passed
## Linearity: 30 strong correlations
## Homoscedasticity: 90.1% passed
##
## Analysing Basal-like
## ----------------------------------------
## Normality: 80.4% passed
## Linearity: 45 strong correlations
## Homoscedasticity: 87.4% passed
##
## Analysing HER2-enriched
## ----------------------------------------
## Normality: 82.6% passed
## Linearity: 152 strong correlations
## Homoscedasticity: 98.6% passed
# Calculate correlations
cor_HER2 <- cor(HER2[,-1], method="pearson")
cor_TN <- cor(TN[,-1], method="pearson")
cor_ERPR <- cor(ERPR[,-1], method="pearson")
# Discovery cohort plots and CPNE3 correlations
par(mfrow=c(2,2), oma=c(2,2,2,2)) # Set outer margins
plot_correlation_with_clusters(cor_HER2, "HER2 Subtype Correlations")
plot_correlation_with_clusters(cor_TN, "Triple Negative Subtype Correlations")
plot_correlation_with_clusters(cor_ERPR, "ER/PR Subtype Correlations")
# Print CPNE3 correlation analysis header
cat("\nCPNE3 CORRELATION ANALYSIS - DISCOVERY COHORTS\n")##
## CPNE3 CORRELATION ANALYSIS - DISCOVERY COHORTS
## ===================================================
##
## Significant CPNE3 correlations in HER2 cohort:
## ----------------------------------------
## Protein Correlation P-Value
## SLC25A3 0.753 1.182e-03
## CACYBP 0.718 2.573e-03
## PRKCSH 0.643 9.673e-03
## RPL19 0.640 1.016e-02
## BOLA2 0.626 1.249e-02
## CANX 0.551 3.315e-02
## LRPPRC 0.540 3.762e-02
## HSPE1 0.526 4.402e-02
## ATP5B 0.521 4.626e-02
## EIF5A 0.516 4.877e-02
##
## Significant CPNE3 correlations in Triple Negative cohort:
## ----------------------------------------
## Protein Correlation P-Value
## RHOC 0.806 2.763e-03
## CALML3 0.764 6.231e-03
## PRDX3 -0.705 1.550e-02
## CS 0.663 2.622e-02
##
## Significant CPNE3 correlations in ERPR cohort:
## ----------------------------------------
## Protein Correlation P-Value
## HIST1H4A -0.860 8.112e-05
## EIF5A -0.727 3.191e-03
## RPL19 -0.579 2.991e-02
# Calculate validation cohort correlations
cor_LumA <- cor(Luminal_A[,-1], method="pearson")
cor_LumB <- cor(Luminal_B[,-1], method="pearson")
cor_Basal <- cor(Basal[,-1], method="pearson")
cor_HER2E <- cor(HER2E[,-1], method="pearson")
# First create all plots
par(mfrow=c(2,2), oma=c(2,2,2,2)) # Set outer margins
plot_correlation_with_clusters(cor_LumA, "Luminal A Correlations")
plot_correlation_with_clusters(cor_LumB, "Luminal B Correlations")
plot_correlation_with_clusters(cor_Basal, "Basal-like Correlations")
plot_correlation_with_clusters(cor_HER2E, "HER2-enriched Correlations")# Print CPNE3 correlation analysis header
cat("\nCPNE3 CORRELATION ANALYSIS - VALIDATION COHORTS\n")##
## CPNE3 CORRELATION ANALYSIS - VALIDATION COHORTS
## ===================================================
# Get and display CPNE3 correlations for validation cohorts
luma_cpne3 <- get_cpne3_correlations(Luminal_A, "Luminal A")##
## Significant CPNE3 correlations in Luminal A cohort:
## ----------------------------------------
## Protein Correlation P-Value
## PPP6R2 0.486 1.874e-02
## KARS -0.415 4.921e-02
##
## Significant CPNE3 correlations in Luminal B cohort:
## ----------------------------------------
## Protein Correlation P-Value
## LRPPRC 0.505 5.213e-03
## EIF5A 0.425 2.165e-02
## VDAC2 0.417 2.432e-02
## RANBP1 0.393 3.481e-02
## PRDX3 0.371 4.754e-02
##
## Significant CPNE3 correlations in Basal-like cohort:
## ----------------------------------------
## Protein Correlation P-Value
##
## Significant CPNE3 correlations in HER2-enriched cohort:
## ----------------------------------------
## Protein Correlation P-Value
## HIST1H4A -0.849 1.557e-02
## PYGM 0.849 1.566e-02
## EIF5A 0.802 2.999e-02
# Calculate Spearman correlations
spear_HER2 <- cor(HER2[,-1], method="spearman")
spear_TN <- cor(TN[,-1], method="spearman")
spear_ERPR <- cor(ERPR[,-1], method="spearman")
# Discovery cohort Spearman plots
par(mfrow=c(2,2), oma=c(2,2,2,2)) # Set outer margins
plot_correlation_with_clusters(spear_HER2, "HER2 Subtype Spearman Correlations")
plot_correlation_with_clusters(spear_TN, "Triple Negative Subtype Spearman Correlations")
plot_correlation_with_clusters(spear_ERPR, "ER/PR Subtype Spearman Correlations")
# Print CPNE3 correlation analysis
cat("\nCPNE3 SPEARMAN CORRELATION ANALYSIS - DISCOVERY COHORTS\n")##
## CPNE3 SPEARMAN CORRELATION ANALYSIS - DISCOVERY COHORTS
## ===================================================
##
## Significant CPNE3 Spearman correlations in HER2 cohort:
## ----------------------------------------
## Protein Correlation P-Value
## BOLA2 0.696 3.921e-03
## CANX 0.579 2.385e-02
## HSPE1 0.571 2.606e-02
## PRKCSH 0.559 3.013e-02
## LRPPRC 0.552 3.278e-02
## KARS 0.552 3.289e-02
## RPL19 0.539 3.802e-02
##
## Significant CPNE3 Spearman correlations in Triple Negative cohort:
## ----------------------------------------
## Protein Correlation P-Value
## PRDX3 -0.864 6.117e-04
## APRT -0.729 1.093e-02
## RPL19 0.655 2.886e-02
## ATP5B -0.630 3.770e-02
## ATP5D -0.618 4.265e-02
##
## Significant CPNE3 Spearman correlations in ERPR cohort:
## ----------------------------------------
## Protein Correlation P-Value
## HIST1H4A -0.836 1.948e-04
## EIF5A -0.683 7.118e-03
# Calculate validation cohort Spearman correlations
spear_LumA <- cor(Luminal_A[,-1], method="spearman")
spear_LumB <- cor(Luminal_B[,-1], method="spearman")
spear_Basal <- cor(Basal[,-1], method="spearman")
spear_HER2E <- cor(HER2E[,-1], method="spearman")
# Validation cohort Spearman plots
par(mfrow=c(2,2), oma=c(2,2,2,2)) # Set outer margins
plot_correlation_with_clusters(spear_LumA, "Luminal A Spearman Correlations")
plot_correlation_with_clusters(spear_LumB, "Luminal B Spearman Correlations")
plot_correlation_with_clusters(spear_Basal, "Basal-like Spearman Correlations")
plot_correlation_with_clusters(spear_HER2E, "HER2-enriched Spearman Correlations")# Print CPNE3 correlation analysis
cat("\nCPNE3 SPEARMAN CORRELATION ANALYSIS - VALIDATION COHORTS\n")##
## CPNE3 SPEARMAN CORRELATION ANALYSIS - VALIDATION COHORTS
## ===================================================
##
## Significant CPNE3 Spearman correlations in Luminal A cohort:
## ----------------------------------------
## Protein Correlation P-Value
## KARS -0.456 2.893e-02
## PPP6R2 0.438 3.670e-02
## HIST1H1E -0.420 4.603e-02
## HIST1H1A -0.419 4.660e-02
##
## Significant CPNE3 Spearman correlations in Luminal B cohort:
## ----------------------------------------
## Protein Correlation P-Value
## LRPPRC 0.509 4.818e-03
## PRDX3 0.494 6.507e-03
## CS 0.459 1.234e-02
## VDAC2 0.447 1.498e-02
## RANBP1 0.393 3.489e-02
## EIF5A 0.380 4.185e-02
##
## Significant CPNE3 Spearman correlations in Basal-like cohort:
## ----------------------------------------
## Protein Correlation P-Value
##
## Significant CPNE3 Spearman correlations in HER2-enriched cohort:
## ----------------------------------------
## Protein Correlation P-Value
## EIF5A 0.857 1.370e-02
## APRT 0.821 2.345e-02
## PYGM 0.821 2.345e-02
## PPP6R2 -0.786 3.624e-02
# Create organized output directories in project root
dir.create("../outputs/correlation_plots", recursive = TRUE, showWarnings = FALSE)
dir.create("../outputs/results", recursive = TRUE, showWarnings = FALSE)
# Discovery cohort plots and correlations
pdf("../outputs/correlation_plots/discovery_cohort_correlations.pdf", width=12, height=12)
par(mfrow=c(2,2), oma=c(2,2,2,2)) # Set outer margins
suppressMessages(suppressWarnings({
plot_correlation_with_clusters(cor_HER2, "HER2 Subtype Correlations")
plot_correlation_with_clusters(cor_TN, "Triple Negative Subtype Correlations")
plot_correlation_with_clusters(cor_ERPR, "ER/PR Subtype Correlations")
}))
invisible(dev.off())
# Save validation cohort plots and correlations
pdf("../outputs/correlation_plots/validation_cohort_correlations.pdf", width=12, height=12)
par(mfrow=c(2,2), oma=c(2,2,2,2))
suppressMessages(suppressWarnings({
plot_correlation_with_clusters(cor_LumA, "Luminal A Correlations")
plot_correlation_with_clusters(cor_LumB, "Luminal B Correlations")
plot_correlation_with_clusters(cor_Basal, "Basal-like Correlations")
plot_correlation_with_clusters(cor_HER2E, "HER2-enriched Correlations")
}))
invisible(dev.off())
# Save discovery cohorts correlation analysis
discovery_results <- suppressMessages(suppressWarnings(
paste0(
"CPNE3 CORRELATION ANALYSIS - DISCOVERY COHORTS\n",
"=============================================\n",
capture.output(get_cpne3_correlations(HER2, "HER2")),
capture.output(get_cpne3_correlations(TN, "Triple Negative")),
capture.output(get_cpne3_correlations(ERPR, "ERPR"))
)
))
suppressMessages(writeLines(discovery_results, "../outputs/results/discovery_cohort_correlations.txt"))
# Save validation cohorts correlation analysis
validation_results <- suppressMessages(suppressWarnings(
paste0(
"CPNE3 CORRELATION ANALYSIS - VALIDATION COHORTS\n",
"=============================================\n",
capture.output(get_cpne3_correlations(Luminal_A, "Luminal A")),
capture.output(get_cpne3_correlations(Luminal_B, "Luminal B")),
capture.output(get_cpne3_correlations(Basal, "Basal-like")),
capture.output(get_cpne3_correlations(HER2E, "HER2-enriched"))
)
))
suppressMessages(writeLines(validation_results, "../outputs/results/validation_cohort_correlations.txt"))
# Save discovery cohort Spearman plots
pdf("../outputs/correlation_plots/discovery_cohort_spearman_correlations.pdf", width=12, height=12)
par(mfrow=c(2,2), oma=c(2,2,2,2))
suppressMessages(suppressWarnings({
plot_correlation_with_clusters(spear_HER2, "HER2 Subtype Spearman Correlations")
plot_correlation_with_clusters(spear_TN, "Triple Negative Subtype Spearman Correlations")
plot_correlation_with_clusters(spear_ERPR, "ER/PR Subtype Spearman Correlations")
}))
invisible(dev.off())
# Save discovery cohort Spearman correlation analysis
discovery_results <- suppressMessages(suppressWarnings(
paste0(
"CPNE3 SPEARMAN CORRELATION ANALYSIS - DISCOVERY COHORTS\n",
"===================================================\n",
capture.output(get_spearman_cpne3_correlations(HER2, "HER2")),
capture.output(get_spearman_cpne3_correlations(TN, "Triple Negative")),
capture.output(get_spearman_cpne3_correlations(ERPR, "ERPR"))
)
))
suppressMessages(writeLines(discovery_results, "../outputs/results/discovery_cohort_spearman_correlations.txt"))
# Save validation cohort Spearman plots
pdf("../outputs/correlation_plots/validation_cohort_spearman_correlations.pdf", width=12, height=12)
par(mfrow=c(2,2), oma=c(2,2,2,2))
suppressMessages(suppressWarnings({
plot_correlation_with_clusters(spear_LumA, "Luminal A Spearman Correlations")
plot_correlation_with_clusters(spear_LumB, "Luminal B Spearman Correlations")
plot_correlation_with_clusters(spear_Basal, "Basal-like Spearman Correlations")
plot_correlation_with_clusters(spear_HER2E, "HER2-enriched Spearman Correlations")
}))
invisible(dev.off())
# Save validation cohort Spearman correlation analysis
validation_results <- suppressMessages(suppressWarnings(
paste0(
"CPNE3 SPEARMAN CORRELATION ANALYSIS - VALIDATION COHORTS\n",
"===================================================\n",
capture.output(get_spearman_cpne3_correlations(Luminal_A, "Luminal A")),
capture.output(get_spearman_cpne3_correlations(Luminal_B, "Luminal B")),
capture.output(get_spearman_cpne3_correlations(Basal, "Basal-like")),
capture.output(get_spearman_cpne3_correlations(HER2E, "HER2-enriched"))
)
))
suppressMessages(writeLines(validation_results, "../outputs/results/validation_cohort_spearman_correlations.txt"))